from pathlib import Path

from llama_index.readers.file import PDFReader
from llama_index.readers.file import PyMuPDFReader
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core import  GPTVectorStoreIndex,VectorStoreIndex
from llama_index.llms import openai_like
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding  # HuggingFaceEmbedding:用于将文本转换为词向量
from llama_index.llms.huggingface import HuggingFaceLLM  # HuggingFaceLLM：用于运行Hugging Face的预训练语言模型
from llama_index.core import Settings,SimpleDirectoryReader,VectorStoreIndex
import chromadb
from llama_index.embeddings.dashscope import DashScopeEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.llms.deepseek  import DeepSeek
from llama_index.embeddings.fastembed import FastEmbedEmbedding


llm = DeepSeek(model="deepseek-chat", api_key="sk-605e60a1301040759a821b6b677556fb")
Settings.llm = llm
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model



loader = PyMuPDFReader()
# docs0 = loader.load_data(file=Path("./data/llama2.pdf"))
docs0 = loader.load(file_path=Path("./data/菁农安全用电对接协议说明_20250406.pdf"))

from llama_index.core import Document

doc_text = "\n\n".join([d.get_content() for d in docs0])
docs = [Document(text=doc_text)]

 

from llama_index.core.node_parser import (
    HierarchicalNodeParser,
    SentenceSplitter,
)

node_parser = HierarchicalNodeParser.from_defaults()

nodes = node_parser.get_nodes_from_documents(docs)

 

from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes
leaf_nodes = get_leaf_nodes(nodes)

 

root_nodes = get_root_nodes(nodes)
 

from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import StorageContext
from llama_index.llms.openai import OpenAI

docstore = SimpleDocumentStore()

# insert nodes into docstore
docstore.add_documents(nodes)

# define storage context (will include vector store by default too)
storage_context = StorageContext.from_defaults(docstore=docstore)

from llama_index.core import VectorStoreIndex

base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
)

base_retriever = base_index.as_retriever(similarity_top_k=6)


from llama_index.core.postprocessor import LLMRerank, SentenceTransformerRerank
from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core.query_engine import RetrieverQueryEngine

# reranker = LLMRerank()
# reranker = SentenceTransformerRerank(top_n=10)
reranker = CohereRerank(top_n=10)


query_engine = RetrieverQueryEngine(base_retriever, node_postprocessors=[reranker])
rs=query_engine.query("登录名什么？")
print(rs)


